import mandb
import jieba
import jieba.posseg
import jieba.analyse
import csv



print()

def ManDB(func=None, param=None):
	def deco(func):
		def wrapper(*args,**kwargs):
			man_db = mandb.DB()
			return func(man_db,*args,**kwargs)			
		wrapper.__name__ = func.__name__
		return wrapper
	return deco if not func else deco(func)


@ManDB
def test(man_db):
	print(man_db.test())



@ManDB
def get_title(man_db):
	sql = "SELECT title \
			FROM `szys_list` ;"
	return_datas = man_db.execute_seles(sql)
	return return_datas

test()
#print(get_title())




csv_data_file = "F:/fenci_data_1.csv"


def fenci_run(s):
	for x,w in jieba.analyse.textrank(s, withWeight=True):
	    print(x)
	    input_datas = [x]
	    with open(csv_data_file, 'a', newline='', encoding='utf-8') as f:
	        csv_write = csv.writer(f,dialect='excel')
	        csv_write.writerow(input_datas)

#fenci_run("此外，公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元，增资后，吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年，实现营业收入0万元，实现净利润-139.13万元。")

'''
for datas in get_title():
	print(datas[0])
	fenci_run(datas[0])
'''


def fenci_uniq(csv_path,csv_data_file):
	d_date=[]
	with open(csv_path, 'r',encoding='utf-8') as f:
		data = csv.reader((line.replace('\0','') for line in f), delimiter=",")
		#data = csv.reader((line for line in f), delimiter=",")
		#print(len(data))
		n=1
		for row in data:
			try:
				
				print(str(n))
				#print(row)
				#print(d_date)
				if row[0] not in d_date:
					print("新数据")

					d_date.append(row[0])

					input_datas = [row[0]]
					#print('D:/py_test/yibiao_Auto/report/xyzg_data/tb_test1.csv')

					#input_datas = [cod_name1,cod_name2]
					with open(csv_data_file, 'a', newline='', encoding='utf-8') as f:
						csv_write = csv.writer(f,dialect='excel')
						csv_write.writerow(input_datas)

				else:
					print("重复数据")
				n+=1

			except:
				print("数据存在遗漏不写入数据库")


cn_1 = "F:/fenci_data_1.csv"
cn_2 = "F:/fenci_data_todb-1.csv"

fenci_uniq(cn_1,cn_2)




'''
print('='*40)
print('1. 分词')
print('-'*40)
 
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list))  # 全模式
 
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list))  # 默认模式
 
seg_list = jieba.cut("他来到了网易杭研大厦")
print(", ".join(seg_list))
 
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造")  # 搜索引擎模式
print(", ".join(seg_list))
 
print('='*40)
print('2. 添加自定义词典/调整词典')
print('-'*40)
 
print('/'.join(jieba.cut('如果放到post中将出错。', HMM=False)))
#如果/放到/post/中将/出错/。
print(jieba.suggest_freq(('中', '将'), True))
#494
print('/'.join(jieba.cut('如果放到post中将出错。', HMM=False)))
#如果/放到/post/中/将/出错/。
print('/'.join(jieba.cut('「台中」正确应该不会被切开', HMM=False)))
#「/台/中/」/正确/应该/不会/被/切开
print(jieba.suggest_freq('台中', True))
#69
print('/'.join(jieba.cut('「台中」正确应该不会被切开', HMM=False)))
#「/台中/」/正确/应该/不会/被/切开
 
print('='*40)
print('3. 关键词提取')
print('-'*40)
print(' TF-IDF')
print('-'*40)
 
s = "此外，公司拟对全资子公司吉林欧亚置业有限公司增资4.3亿元，增资后，吉林欧亚置业注册资本由7000万元增加到5亿元。吉林欧亚置业主要经营范围为房地产开发及百货零售等业务。目前在建吉林欧亚城市商业综合体项目。2013年，实现营业收入0万元，实现净利润-139.13万元。"
for x, w in jieba.analyse.extract_tags(s, withWeight=True):
    print('%s %s' % (x, w))
 
print('-'*40)
print(' TextRank')
print('-'*40)
 
for x, w in jieba.analyse.textrank(s, withWeight=True):
    print('%s %s' % (x, w))
 
print('='*40)
print('4. 词性标注')
print('-'*40)
 
words = jieba.posseg.cut("我爱北京天安门")
for word, flag in words:
    print('%s %s' % (word, flag))
 
print('='*40)
print('6. Tokenize: 返回词语在原文的起止位置')
print('-'*40)
print(' 默认模式')
print('-'*40)
 
result = jieba.tokenize('永和服装饰品有限公司')
for tk in result:
    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
 
print('-'*40)
print(' 搜索模式')
print('-'*40)
 
result = jieba.tokenize('永和服装饰品有限公司', mode='search')
for tk in result:
    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
'''




